
setwd("~/data")
reg.data1a <- read.csv("SM_CalibratingAutonomy.csv")
reg.data1a$Education <- as.factor(reg.data1a$Education)

library(multiwayvcov)  # Required for cluster.boot()
library(huxtable)  # Required for as_hux(), set_borders(), set_italic(), quick_docx()

#### Calculate the coefficient estimates and bootstrapped standard errors
block_bootstrap_coef <- function(model_formula, data, cluster_variable){
  
  model <- lm(model_formula, data = data)
  coefficients <- summary(model)$coefficients
  
  # Bootstrap variance-covariance matrix of coefficients using cluster.boot()
  boot_vcov <- multiwayvcov::cluster.boot(model, cluster = data[[cluster_variable]], R = 1000)
  
  # Edit the standard errors and p values in the coefficients table
  coefficients[,2] <- sqrt(diag(boot_vcov))
  coefficients[,3] <- coefficients[,1]/coefficients[,2]
  coefficients[,4] <- (1-pnorm(q=abs(coefficients[,3])))*2
  colnames(coefficients) <- c("Estimate", "St. Error", "z", "p")
  
  # View the results
  return(list(coefficients = coefficients, variance.covariance.matrix = boot_vcov))
}



#### Calculate predicted values and plot them ####
plot_predictions <- function(model_formula, data, vcov, plot_filenames){
  model <- lm(model_formula, data = data)
  
  x_variable_names <- c("Independence", "Discretion")
  new_data_list <- vector("list", length(x_variable_names))
  names(new_data_list) <- x_variable_names
  
  # Repeat this block of code twice: once for Independence and again for Discretion
  v <- 2
  for(v in 1:length(x_variable_names)){
    x_variable_name <- x_variable_names[v]
    
    
    #### Prepare the new data for which predicted values are to be calculated
    
    # Specify the desired values of the covariates for the predictions
    prediction_data <- list(seq(from=-2, to=2, by=.1),
                            seq(from=-1, to=1, by= 1))
    names(prediction_data) <- c(x_variable_name, "Capacity")
    
    # Create new data frame for prediction with just the covariates we're varying
    new_data <- expand.grid(prediction_data)
    
    # Calculate the means of the remaining covariates to be included in the new_data
    original_data <- model.frame(model)
    covariate_means <- colMeans(original_data[, !(names(original_data) %in% "(Intercept)")])
    
    # Adjust for polynomial terms. We don't want the mean of the squared term. We want the square of the mean of the base term.
    poly_vars <- grep("^I\\((.+)\\^2\\)$", names(covariate_means), value = TRUE, perl = TRUE)
    for (poly_var in poly_vars) {
      base_var <- gsub("^I\\((.+)\\^2\\)$", "\\1", poly_var, perl = TRUE)
      if (base_var %in% names(covariate_means)) {
        covariate_means[poly_var] <- covariate_means[base_var]^2
      }
    }
    
    # Add the means of the covariates to the new_data 
    for (var in names(covariate_means)) {
      if (!var %in% names(new_data)) {
        new_data[[var]] <- covariate_means[var]
      }
    }
    
    
    
    
    #### Calculate predicted values for the new data, with confidence intervals
    
    # Calculate predicted values
    new_data$predicted <- predict(model, newdata = new_data)
    
    # Compute the standard errors for those predictions, using the bootstrapped variance-covariance matrix
    X_new <- model.matrix(model_formula, data = new_data)
    SE_pred <- sqrt(diag(X_new %*% vcov %*% t(X_new)))
    new_data$se <- SE_pred
    
    # Compute the confidence intervals
    alpha <- 0.05
    z_value <- qnorm(1 - alpha/2)
    new_data$CI_low <- new_data$predicted - z_value * SE_pred
    new_data$CI_high <- new_data$predicted + z_value * SE_pred
    
    # View the results
    new_data
    new_data_list[[v]] <- new_data
    
    
    
    #### Plot the predicted values and confidence intervals

    png(filename = plot_filenames[v], width = 5.5, height = 4.0, units = "in", res = 600)
    par(mar = c(5,5,3,8))
    plot(x = NULL, y = NULL,
         xlim = c(-2,2),
         ylim = c(2,4.1),
         xlab = x_variable_name,
         ylab = "Quality",
         bty = "n",
    )
    grid()
    
    predicted_values_list <- list(new_data[1:41,],
                                  new_data[42:82,],
                                  new_data[83:123,])
    colors <- c("gray1", "gray30", "gray60")
    for(c in 1:length(predicted_values_list)){
      predictions <- predicted_values_list[[c]]
      colnames(predictions)[colnames(predictions) == x_variable_name] <- "x"
      
      polygon(x = c(predictions$x, rev(predictions$x)),
              y = c(predictions$CI_low, rev(predictions$CI_high)),
              col = adjustcolor(colors[c], alpha.f = .3),
              border = NA)
      lines(x = predictions$x,
            y = predictions$predicted,
            lwd = 2,
            col = colors[c])
    }
    legend(x = "topright", 
           inset = c(-.5,0),
           col = rev(colors),
           lwd = 2,
           legend = c("Capacity = 1",
                      "Capacity = 0",
                      "Capacity = -1"),
           xpd = TRUE,
           cex = .8,
           xjust = 0
           
    )
    dev.off()
  }
  return(new_data_list)
}


word_table <- function(table, filename){
  # Convert to huxtable 
  ht <- huxtable::as_hux(table, add_colnames = TRUE, add_rownames = TRUE)
  
  # Left-justify the row names
  ht <- huxtable::set_align(ht, row = everywhere, col = 1, value = "left")
  
  # Replace the word "rownames" with an empty string
  ht[1, 1] <- ""
  
  # Set the desired borders
  ht <- huxtable::set_all_borders(ht, 0)                       # Remove all borders first
  ht <- huxtable::set_top_border(ht, row = 1, col = everywhere, value = 0.4)    # Top border
  ht <- huxtable::set_bottom_border(ht, row = 1, col = everywhere, value = 0.4)    # Border under the header
  ht <- huxtable::set_bottom_border(ht, row = nrow(ht), col = everywhere, value = 0.4)    # Top border
  
  # Italicize the headers
  ht <- huxtable::set_italic(ht, 1, everywhere, TRUE)
  
  # Export to Word
  huxtable::quick_docx(ht, file = filename)
}





### Model 1
model_formula <- formula(Quality ~ Independence + I(Independence^2) + Discretion + I(Discretion^2))
model1.bootstrap <- block_bootstrap_coef(model_formula = model_formula, 
                                         data = reg.data1a, 
                                         cluster_variable = "AGID")
model1.bootstrap$coefficients
word_table(model1.bootstrap$coefficients, filename = "ols_bootstrap_without_controls.docx")


# These two models mimic the lmer models tested above 

### Model 2
model_formula <- formula(Quality~Independence + I(Independence^2) + Discretion + I(Discretion^2)+ Appointee + Gender + Years + Education + Fed.District +Party)
model2.bootstrap <- block_bootstrap_coef(model_formula = model_formula, 
                                         data = reg.data1a, 
                                         cluster_variable = "AGID")
model2.bootstrap$coefficients
word_table(model2.bootstrap$coefficients, filename = "ols_bootstrap_with_controls.docx")


### Model 3
model_formula <- formula(Quality ~ Independence * Capacity + Discretion * Capacity + Capacity * I(Discretion^2))
model3.bootstrap <- block_bootstrap_coef(model_formula = model_formula, 
                                         data = reg.data1a, 
                                         cluster_variable = "AGID")
model3.bootstrap$coefficients
word_table(model3.bootstrap$coefficients, filename = "ols_bootstrap_interaction_without_controls.docx")
plot_predictions(model_formula = model_formula, data = reg.data1a, 
                 vcov = model3.bootstrap$variance.covariance.matrix,
                 plot_filenames = c("ols_bootstrap_interaction_capacity_independence_without_controls.png",
                                    "ols_bootstrap_interaction_capacity_discretion_without_controls.png")
                 )



### Model 4
# Convert factor variables into numeric to enable us to calculate their means later when generating the new_data for the predicted values
reg.data1a$Edu1 <- as.numeric(reg.data1a$Education == 1)
reg.data1a$Edu2 <- as.numeric(reg.data1a$Education == 2)
reg.data1a$Edu3 <- as.numeric(reg.data1a$Education == 3)
reg.data1a$Edu4 <- as.numeric(reg.data1a$Education == 4)
reg.data1a$Edu5 <- as.numeric(reg.data1a$Education == 5)
reg.data1a$Appointee.numeric <- as.numeric(reg.data1a$Appointee)
reg.data1a$Gender.numeric <- as.numeric(reg.data1a$Gender)
reg.data1a$Fed.District.numeric <- as.numeric(reg.data1a$Fed.District)
reg.data1a$Party.numeric <- as.numeric(reg.data1a$Party)

model_formula <- formula(Quality~Independence*Capacity + Discretion*Capacity + Capacity*I(Discretion^2)+ Appointee.numeric + Gender.numeric + Years + Edu2 + Edu3 + Edu4 + Edu5 + Fed.District.numeric +Party.numeric )
model4.bootstrap <- block_bootstrap_coef(model_formula = model_formula, 
                                         data = reg.data1a, 
                                         cluster_variable = "AGID")
model4.bootstrap$coefficients
word_table(model4.bootstrap$coefficients, filename = "ols_bootstrap_interaction_with_controls.docx")
plot_predictions(model_formula = model_formula, data = reg.data1a, 
                 vcov = model4.bootstrap$variance.covariance.matrix,
                 plot_filenames = c("ols_bootstrap_interaction_capacity_independence_with_controls.png",
                                    "ols_bootstrap_interaction_capacity_discretion_with_controls.png")
                 )



